### analysis for indonesia ##
# preamble #### 
rm(list=ls())

library(foreign)
library(survey)
library(xtable)
library(magrittr)
library(readxl)
library(haven)
library(fixest)
library(sandwich)
library(estimatr)
library(tidyverse)


setwd("~/Dropbox/facebook sampling/replication/indonesia replication")
data_exports<-"../data_exports/"

set.seed=1234

load("indonesia data/indonesia_facebook_cleaned_weighted.Rda")
d1<-d%>%filter(wave=="wave1") 
d%<>%filter(wave=="wave2") ## restrict to 2nd wave, since this is what we use for most analyses 
paperfigs<-"figures/"

# assess non-response #### 
## overall incomplete ratio
nrow(incompletes)/(nrow(incompletes)+nrow(d))

## calculate sample sizes after excluding those without fb-assigned demographics
d%<>%mutate(type="Respondent")%>%
  mutate(cell=paste(fb_region,fb_gender,fb_agegroup,sep="."))
incompletes%<>%mutate(type=ifelse(duration.minutes<5,"Speeder","Non-respondent"))%>%
  mutate(cell=paste(fb_region,fb_gender,fb_agegroup,sep="."))

ninrtab<-bind_rows(d%>%select(all_of(names(incompletes))),
          incompletes%>%filter(wave=="wave2"))%>%
  select(-duration.minutes,-wave,-fb_region,-cell)%>% ## could leave "geoFB" in if wanted to focus on micro-regions, but there are lots of municipalities and 4 regions makes the table tractable. 
  filter(!is.na(fb_agegroup),!is.na(fb_gender))
# ninr.n<-nrow(ninrtab)
ninr.n<-ninrtab%>%
  pivot_longer(cols=-type,names_to="variable")%>%
  group_by(variable)%>%
  count(value)%>%
  rename(ninr.n=n)
ninrtab2<-ninrtab%>%
  pivot_longer(cols=-type,names_to="variable")%>%
  group_by(variable,type)%>%
  count(value)%>%
  left_join(ninr.n,by=c("variable","value"))%>%
  # mutate(percent=n*100/ninr.n)%>%
  mutate(percent=n*100/ninr.n)%>%
  select(-n,-ninr.n)%>%
  arrange(variable,value,type)%>%
  pivot_wider(id_cols=c("variable","value"),names_from="type",
              values_from="percent")%>%
  mutate(variable=gsub("fb_","",variable))%>%
  select(variable,value,Respondent,`Non-respondent`,Speeder)
print.xtable(xtable(ninrtab2),include.rownames = FALSE)

ninrtab%<>%mutate(attrition=ifelse(type=="Respondent",0,1),
                  fb_agegroup=relevel(factor(fb_agegroup),ref="21-29"))

## figure 3: predictors of non-response #### 
## figure created in separate script, data underlying it are saved here. 
fit<-lm_robust(attrition~fb_agegroup+fb_gender,data=ninrtab)
## save dataframe of results to include in Figure 3 
fit<-broom::tidy(fit)
write_csv(fit,file=paste0(data_exports,"fig3_attritionreg_indonesia.csv"))

# check self-reported vs. FB-reported demographic and geographic match #### 
demos<-d%>%
  select(ResponseId,demo_gender_orig,demo_region,demo_agegroup_orig,
         fb_agegroup,fb_gender,fb_region,demo_region_recode,cell)
nrow(demos[is.na(demos$demo_region_recode),])

demos%<>%mutate(
  agematch=ifelse(demo_agegroup_orig==fb_agegroup,1,0),
  gendermatch=ifelse(demo_gender_orig==fb_gender,1,0),
  geomatch=ifelse(demo_region_recode==fb_region,1,0)
)
nrow(d%>%filter(is.na(demo_age)))
nrow(demos%>%filter(is.na(fb_agegroup))) 
nrow(demos%>%filter(is.na(fb_gender))) 
nrow(demos%>%filter(is.na(fb_agegroup))) ## These are due to link shares 
demos%<>%filter(!is.na(fb_gender),!is.na(fb_region),!is.na(fb_agegroup))

demo.sum<-do.call(rbind,list(data.frame(prop.table(table(demos$agematch))*100),
                             data.frame(prop.table(table(demos$gendermatch))*100),
                             data.frame(prop.table(table(demos$geomatch))*100)))%>%
  filter(Var1==1)%>%
  select(-Var1)%>%
  rename("Percent correct"=Freq)
demo.sum$Characteristic<-c("Age","Gender","Location")

demos%<>%filter(!is.na(fb_gender),!is.na(fb_agegroup),!is.na(fb_region),
         fb_gender!="'--Sanitized",fb_agegroup!="'--Sanitized",fb_region!="'--Sanitized")

cellsum<-demos%>%
  count(cell)

## data for table 2 #### 
print(demo.sum) ## this data is manually inserted into table 2 in the tex file 

## create data frame of demographics for ad clickers 
adclickers<-demos%>%select(fb_agegroup,fb_gender,fb_region,cell)%>% ## these are the survey completers
  bind_rows(incompletes%>%filter(wave=="wave 2")%>%select(fb_agegroup,fb_gender,fb_region,cell)) ## these are the incompletes

rm(demo.sum,demos)

# figures 4 and 5: political behavior and public opinion #### 
## FB political summaries #### 
## function to create summaries with unweighted data 
demosum.unwtd<-function(var="demo_gender",sample="Facebook (unweighted)",data=d){
  dsub<-data%>%
    select(all_of(var))%>%
    fastDummies::dummy_cols()%>%
    select(-all_of(var))
  dsub<-svydesign(ids=~0,data=dsub)
  vars<-names(dsub$variables)
  dsub<-data.frame(svymean(make.formula(vars),dsub,na.rm=TRUE))
  dsub$var<-rownames(dsub)
  dsub%<>%separate(var,into=c("var","term"),sep="_",remove=TRUE)
  names(dsub)[1:2]<-c("estimate","std.error")
  dsub$sample<-sample
  return(dsub)
}
protest.fb.unwtd<-demosum.unwtd("Protested")
meeting.fb.unwtd<-demosum.unwtd("Meeting")%>%
  mutate(var="Attended a meeting")
petition.fb.unwtd<-demosum.unwtd("Petition")%>%mutate(var="Signed a petition")
online.fb.unwtd<-demosum.unwtd("OnlinePost")%>%
  mutate(var="Posted online")

happening.fb.unwtd<-demosum.unwtd("gwhappening")
human.fb.unwtd<-demosum.unwtd("gwhuman")
important.fb.unwtd<-demosum.unwtd("gwimportant")
frequentvoter.fb.unwtd<-demosum.unwtd("frequentvoter")%>%mutate(var="Frequent voter")
presidentparty.fb.unwtd<-demosum.unwtd("presidentparty")%>%mutate(var="President's party") ## note in text that this is only based on the 2nd wave of data
vote.fb.unwtd<-demosum.unwtd("votelastelection")%>%mutate(var="Voted last election")

## function to create summaries with weighted data
demosum.wtd<-function(var="demo_gender",sample="Facebook (weighted)",weights=d$weight_trimmed,data=d){
  dsub<-data%>%
    select(all_of(var))%>%
    fastDummies::dummy_cols()%>%
    select(-all_of(var))
  dsub<-svydesign(ids=~0,data=dsub,weights=weights)
  vars<-names(dsub$variables)
  dsub<-data.frame(svymean(make.formula(vars),dsub,na.rm=TRUE))
  dsub$var<-rownames(dsub)
  dsub%<>%separate(var,into=c("var","term"),sep="_",remove=TRUE)
  names(dsub)[1:2]<-c("estimate","std.error")
  dsub$sample<-sample
  return(dsub)
}
protest.fb.wtd<-demosum.wtd("Protested")
meeting.fb.wtd<-demosum.wtd("Meeting")%>%mutate(var="Attended a meeting")
petition.fb.wtd<-demosum.wtd("Petition")%>%mutate(var="Signed a petition")
frequentvoter.fb.wtd<-demosum.wtd("frequentvoter")%>%mutate(var="Frequent voter")
presidentparty.fb.wtd<-demosum.wtd("presidentparty",sample="Facebook (weighted)")%>%
  mutate(var="President's party")

online.fb.wtd<-demosum.wtd("OnlinePost")%>%
  mutate(var="Posted online")
happening.fb.wtd<-demosum.wtd("gwhappening")
human.fb.wtd<-demosum.wtd("gwhuman")
important.fb.wtd<-demosum.wtd("gwimportant")
vote.fb.wtd<-demosum.wtd("votelastelection")%>%mutate(var="Voted last election")

## AB political summaries #### 
asiabaro<-read_csv("indonesia data/asianbarometer_cleaned.csv")
protest.ab<-demosum.unwtd(var="Protested",sample="Asiabarometer",data=asiabaro)
meeting.ab<-demosum.unwtd("Meeting","Asiabarometer",asiabaro)%>%mutate(var="Attended a meeting")
petition.ab<-demosum.unwtd("Petition","Asiabarometer",asiabaro)%>%mutate(var="Signed a petition")
online.ab<-demosum.unwtd("OnlinePost","Asiabarometer",asiabaro)%>%
  mutate(var="Posted online")
voting.ab<-demosum.unwtd("votelastelection","Asiabarometer",asiabaro)%>%mutate(var="Voted last election")

asiabaro%<>%mutate(frequentvoter=ifelse(votinghabit%in%c("most","every"),"frequent","infrequent"))
frequentvoter.ab<-demosum.unwtd("frequentvoter","Asiabarometer",asiabaro)%>%mutate(var="Frequent voter")

asiabaro%<>%mutate(presidentparty=ifelse(party=="PDI-P","pdip","otherparty"))
presidentparty.ab<-demosum.unwtd("presidentparty","Asiabarometer",asiabaro)%>%mutate(var="President's party")


## dynata political summaries #### 
dynata<-read_csv("indonesia data/dynata_cleaned.csv")

dynata%<>%
  mutate(gw_human=case_when(gw_human=="Caused mostly by human activities"~"Human",
                            gw_human=="Caused mostly by natural changes in the environment"~"Natural",
                            grepl("None",gw_human)==TRUE~"Nothappening"),
         gw_important=gsub(" ","",gw_important))
dynata%<>%rename(gwhappening=gw_happen,
                 gwhuman=gw_human,
                 gwimportant=gw_important)
table(dynata$gwhappening)
table(dynata$gwhuman)
table(dynata$gwimportant)
human.dynata.unwtd<-demosum.unwtd("gwhuman",sample="Dynata (unweighted)",data=dynata)
human.dynata.wtd<-demosum.wtd("gwhuman","Dynata (weighted)",dynata$weight_trimmed,dynata)
happening.dynata.unwtd<-demosum.unwtd("gwhappening",sample="Dynata (unweighted)",data=dynata)
happening.dynata.wtd<-demosum.wtd("gwhappening","Dynata (weighted)",dynata$weight_trimmed,dynata)
important.dynata.unwtd<-demosum.unwtd("gwimportant",sample="Dynata (unweighted)",data=dynata)
important.dynata.wtd<-demosum.wtd("gwimportant","Dynata (weighted)",dynata$weight_trimmed,dynata)

dynata%<>%mutate(frequentvoter=ifelse(votinghabit%in%c("most","every"),"frequent","infrequent"))
frequentvoter.dynata.unwtd<-demosum.unwtd("frequentvoter","Dynata (unweighted)",dynata)%>%mutate(var="Frequent voter")
frequentvoter.dynata.wtd<-demosum.wtd("frequentvoter","Dynata (weighted)",dynata$weight_trimmed,dynata)%>%mutate(var="Frequent voter")

voting.census<-data.frame(estimate=.791,std.error=0,var="Voted last election",term="Yes",sample="Election results") ## note that this doesn't actually come from census 
political<-do.call(rbind,list(online.ab,petition.ab,protest.ab,meeting.ab,
                              online.fb.unwtd,petition.fb.unwtd,protest.fb.unwtd,meeting.fb.unwtd,
                              online.fb.wtd,petition.fb.wtd,protest.fb.wtd,meeting.fb.wtd,voting.census,
                              voting.ab,vote.fb.unwtd,vote.fb.wtd,
                              frequentvoter.ab,frequentvoter.dynata.unwtd,frequentvoter.dynata.wtd,
                              frequentvoter.fb.unwtd,frequentvoter.fb.wtd,
                              human.fb.unwtd,human.fb.wtd,happening.fb.unwtd,happening.fb.wtd,
                              important.fb.unwtd,important.fb.wtd,human.dynata.unwtd,human.dynata.wtd,
                              happening.dynata.unwtd,happening.dynata.wtd,important.dynata.unwtd,important.dynata.wtd,
                              presidentparty.ab,presidentparty.fb.unwtd,presidentparty.fb.wtd))
## manually add a "yes" row with estimate=0 for petition signers from AB survey 
political<-rbind(political,data.frame(estimate=0,std.error=0,var="Signed a petition",term="Yes",sample="Asiabarometer")) ## REMEMBER TO NOTE THIS IN THE WRITEUP 
## reformat political object so that it can be made into figures ##
political%<>%
  filter(term!="No",term!="DK",!is.na(term),term!="NA",
         term!="Nothappening",term!="Natural")%>% ## create single values for "global warming is happening" and political action questions
  mutate(term=ifelse(term=="yes"|term=="Yes",var,term),
         # var=ifelse(var=="partycollapsed","party",var),
         term=case_when(term=="Human"~"Climate change is human caused",
                        # term=="Natural"~"Climate change is due to natural causes",
                        # term=="Nothappening"~"Climate change is not happening",
                        term=="gwhappening"~"Climate change is happening",
                        term=="Extremelyimportant"~"Climate change extremely important",
                        term=="Notatallimportant"~"Climate change not at all important",
                        term=="Nottooimportant"~"Climate change not too important",
                        term=="Somewhatimportant"~"Climate change somewhat important",
                        term=="Veryimportant"~"Climate change very important",
                        term=="frequent"~"Vote in most or every election",
                        term=="infrequent"~"Rarely or never vote",
                        term=="pdip"~"President's party (PDI-P)",
                        term=="otherparty"~"Other party",
                        var=="Voted last election"~"Voted in 2019 election",
                        TRUE~term))
## data for Figure 4 and Figure 5 #### 
write_csv(political,paste0(data_exports,"fig4-5_indonesia_policy_behavior.csv"))


# Figure 2: demographic summaries #### 
## FB demographic summaries #### 
d%<>%
  rename(demogender=demo_gender,
         demoreligion=dem_rel,
         demoeducation=degreerecode,
         demoagegroup=demo_agegroup)%>%
  mutate(demoagegroup=gsub("-","",demoagegroup),
         demoagegroup=ifelse(demoagegroup=="60+","60",demoagegroup))
d%<>%mutate(demoreligionrecode=case_when(demoreligion%in%c("Christian","Catholic")~"Christian",
                                         demoreligion%in%c("Buddha","Confucianism","Hindus","Other")~"Other",
                                         demoreligion=="Islam"~"Muslim",
                                         TRUE~demoreligion))
table(d$demoreligionrecode)
table(d$dem_maritial)

d%<>%mutate(marital=case_when(dem_maritial%in%c("Divorced","Separated")~"Divorced",
                              dem_maritial%in%c("Living together as married","Married")~"Married",
                              dem_maritial%in%c("Single","Widowed")~"Single"))
table(d$marital)

table(d$demoeducation)
marital.fb.unwtd<-demosum.unwtd("marital")
gender.fb.unwtd<-demosum.unwtd("demogender")
religion.fb.unwtd<-demosum.unwtd("demoreligionrecode")
education.fb.unwtd<-demosum.unwtd("demoeducation")%>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))
agegroup.fb.unwtd<-demosum.unwtd("demoagegroup")

marital.fb.wtd<-demosum.wtd("marital")
gender.fb.wtd<-demosum.wtd("demogender")
religion.fb.wtd<-demosum.wtd("demoreligionrecode")
education.fb.wtd<-demosum.wtd("demoeducation")%>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))
agegroup.fb.wtd<-demosum.wtd("demoagegroup")

## FB ad clickers demographic summaries #### 
adclickers%<>%filter(!is.na(fb_agegroup),!is.na(fb_gender))%>%
  mutate(fb_agegroup=gsub("_","",fb_agegroup),
         fb_agegroup=gsub("-","",fb_agegroup),
         fb_agegroup=ifelse(fb_agegroup=="60+","60",fb_agegroup))%>%
  rename(fbagegroup=fb_agegroup,
         fbgender=fb_gender)
agegroup.adclickers<-demosum.unwtd(var="fbagegroup",sample="Facebook (ad clickers)",data=adclickers)
gender.adclickers<-demosum.unwtd(var="fbgender",sample="Facebook (ad clickers)",data=adclickers)

## AB demographic summaries #### 
asiabaro%<>%
  mutate(agegroup=gsub("-","",agegroup),
         agegroup=ifelse(agegroup=="60+","60",agegroup))%>%
  filter(!is.na(agegroup)) ## lose 24 responses here
asiabaro%<>%mutate(religionrecode=
                     case_when(religion=="Islam"~"Muslim",
                               religion%in%c("Buddha","Confucianism","Hindu","Others")~"Other",
                               religion%in%c("Protestant","Catholic")~"Christian"))

gender.ab<-demosum.unwtd(var="gender",sample="Asiabarometer",data=asiabaro)
religion.ab<-demosum.unwtd(var="religionrecode",sample="Asiabarometer",data=asiabaro)
asiabaro%<>%rename(demoeducation=dem_edu)
asiabaro%<>%
  mutate(edrecode=case_when(demoeducation%in%c("None","Primary")~"nosecondary",
                            demoeducation=="Secondary"~"secondary",
                            demoeducation=="University"~"Postsecondary",
                            TRUE~demoeducation))
table(asiabaro$edrecode)
education.ab<-demosum.unwtd(var="edrecode",sample="Asiabarometer",data=asiabaro)%>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))
agegroup.ab<-demosum.unwtd(var="agegroup",sample="Asiabarometer",data=asiabaro)

# dynata demographic summaries #### 
dynata%<>%
  rename(demogender=demo_gender,
         demoeducation=degreerecode,
         demoagegroup=demo_agegroup)
dynata%<>%mutate(
  demoagegroup=gsub("-","",demoagegroup),
  demoagegroup=ifelse(demoagegroup=="60+","60",demoagegroup)
)

gender.dynata.wtd<-demosum.wtd("demogender",sample="Dynata (weighted)",weights=dynata$weight_trimmed,data=dynata)
education.dynata.wtd<-demosum.wtd("demoeducation",sample="Dynata (weighted)",weights=dynata$weight_trimmed,data=dynata)%>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))
agegroup.dynata.wtd<-demosum.wtd("demoagegroup",sample="Dynata (weighted)",weights=dynata$weight_trimmed,data=dynata)

gender.dynata.unwtd<-demosum.unwtd("demogender",sample="Dynata (unweighted)",data=dynata)
education.dynata.unwtd<-demosum.unwtd("demoeducation",sample="Dynata (unweighted)",data=dynata)%>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))
agegroup.dynata.unwtd<-demosum.unwtd("demoagegroup",sample="Dynata (unweighted)",data=dynata)

table(dynata$dem_marital)
dynata%<>%mutate(marital=case_when(dem_marital%in%c("Divorced","Separated")~"Divorced",
                                   dem_marital%in%c("Single","Widowed")~"Single",
                                   dem_marital%in%c("Married","Living together as married")~"Married"))
marital.dynata.wtd<-demosum.wtd("marital","Dynata (weighted)",dynata$weight_trimmed,dynata)
marital.dynata.unwtd<-demosum.unwtd("marital","Dynata (unweighted)",dynata)
## census demographics #### 
## merge in # of households
households<-read_csv("indonesia data/households.csv")
households%<>%
  mutate(households=`households (thousands)`*1000,
         Province=str_to_title(Province),
         Province=case_when(Province=="Kep. Bangka Belitung"~"Kepulauan Bangka Belitung",
                            Province=="Kep. Riau"~"Kepulauan Riau",
                            Province=="Dki Jakarta"~"Daerah Khusus Ibu Kota Jakarta",
                            Province=="Di Yogyakarta"~"Daerah Istimewa Yogyakarta",
                            TRUE~Province))%>%
  filter(Province!="Indonesia",!is.na(Province))#%>%

# marital status from census 
marital.census<-read_csv("indonesia data/marital.csv")
marital.census%<>%mutate(Province=gsub("[[:digit:]]+","",Province),
                         Province=gsub("[[:punct:]]","",Province),
                         Province=str_squish(Province))%>%
  filter(!is.na(Province))
marital.total<-sum(marital.census$Total)

marital.census%<>%
  select(-Total)%>%
  filter(!is.na(Province))%>%
  pivot_longer(Single:Widowed,names_to="term",values_to="freq")%>%
  mutate(term=ifelse(term=="Widowed","Single",term))%>%
  group_by(term)%>%
  summarise(estimate=sum(freq)/marital.total,
            std.error=0,
            var="marital",
            sample="Census")


## religion data from census 
## population from census 
regionrecodes<-d%>%
  select(demo_region,demo_region_recode)%>%distinct()
religion.census<-read_csv("indonesia data/religion.csv")%>%
  filter(!is.na(province))%>%
  left_join(regionrecodes,by=c("province"="demo_region_recode"))
religion.census%<>%
  select(-province)%>%
  pivot_longer(-demo_region,values_to="Freq",names_to="religion")%>%
  mutate(term=case_when(religion=="Christian"|religion=="Catholic"~"Christian",
                            religion=="Islam"~"Muslim",
                            religion%in%c("Christian","Catholic","Islam")==FALSE~"Other"))%>%
  group_by(term)%>%
  summarise(Freq=sum(Freq))
religiontotal<-sum(religion.census$Freq)
religion.census%<>%
  mutate(estimate=Freq/religiontotal,
         var="demoreligionrecode",
         sample="Census",
         std.error=0)%>%
  select(-Freq)

## gender, age data from census 
load("indonesia data/weightingdata_indonesia.Rda")
gender.census<-gender%>%
  mutate(estimate=Freq/sum(gender$Freq),
         std.error=0,
         var="gender",
         sample="Census")%>%
  rename(term=demo_gender)%>%
  select(-Freq)

age.census<-age%>%
  mutate(estimate=Freq/sum(age$Freq),
         std.error=0,
         var="agegroup",
         sample="Census")%>%
  rename(term=demo_agegroup)%>%
  select(-Freq)
degree.census%<>%
  mutate(term=case_when(term=="Postsecondary"~"College degree or higher",
                        term=="secondary"~"Secondary degree",
                        term=="nosecondary"~"Did not complete secondary school"))

demosum<-do.call(rbind,list(degree.census,education.fb.wtd,education.fb.unwtd,
                            gender.fb.unwtd,gender.fb.wtd,gender.ab,
                            religion.fb.unwtd,religion.fb.wtd,religion.ab,religion.census,
                            education.ab,
                            gender.dynata.wtd,gender.dynata.unwtd,
                            education.dynata.wtd,education.dynata.unwtd,
                            agegroup.dynata.wtd,agegroup.dynata.unwtd,
                            agegroup.ab,agegroup.fb.unwtd,agegroup.fb.wtd,
                            gender.census,age.census,marital.fb.unwtd,marital.fb.wtd,
                            marital.dynata.unwtd,marital.dynata.wtd,
                            marital.census,gender.adclickers,agegroup.adclickers
                            ))%>%
  mutate(ci.high=estimate+1.96*std.error,
                  ci.low=estimate-1.96*std.error)%>%
  filter(term!="Male")%>%
  mutate(term=case_when(
    term=="60"~"60+",
    term=="3049"~"30-49",
    term=="5059"~"50-59",
    term=="1829"~"18-29",
    term=="2129"~"21-29",
    term=="Other"~"Other religion",
    TRUE~term))

## data for figure 2 (demographics) #### 
write_csv(demosum,paste0(data_exports,"fig2_demos_indonesia.csv"))


# Table 3: Tverksy + Khaneman - disease problem survey experiment #### 
## WAVE 1, unweighted ##

# save framing: Table S2
table(d1$prospect_gain)/length(na.omit(d1$prospect_gain))
# 53% choose certain, 47% choose risky

# die framing: Table S2
table(d1$prospect_loss)/length(na.omit(d1$prospect_loss))
# 45% choose certain, 55% choose risky

## weighted, for Table 3 and S2 ##
## gain/save ##
# Subset data for relevant categories
subset_save <- d1[d1$prospect_gain %in% c("Program A", "Program B"), ]

# Calculate weighted counts
weighted_save <- with(subset_save, tapply(weight_trimmed, prospect_gain, sum))

# Calculate proportions for Table 3, column 7
weighted_prop_save <- weighted_save / sum(weighted_save)


## loss/die ##
# Subset data for relevant categories
subset_die <- d1[d1$prospect_loss %in% c("Program A", "Program B"), ]

# Calculate weighted counts
weighted_die <- with(subset_die, tapply(weight_trimmed, prospect_loss, sum))

# Calculate proportions for Table 3, column 8
weighted_prop_die <- weighted_die / sum(weighted_die)

## t test for the die column 
subset_die%<>%
  select(ResponseId,weight_trimmed,prospect_loss)%>%
  mutate(choice=1)%>%
  pivot_wider(id_cols=c(ResponseId,weight_trimmed),names_from=prospect_loss,values_from=choice)%>%
  mutate(across(`Program A`:`Program B`,~ifelse(is.na(.),0,.)))


# t.test(x=subset_die$prospect_loss[])
library(weights)
wtd.t.test(x=subset_die$`Program A`,
           y=subset_die$`Program B`,
           weight=subset_die$weight_trimmed,
           weighty=subset_die$weight_trimmed)
t.test(x=subset_die$`Program A`,y=subset_die$`Program B`,alternative=c("two.sided"))

## t test for the save column
subset_save%<>%
  select(ResponseId,weight_trimmed,prospect_gain)%>%
  mutate(choice=1)%>%
  pivot_wider(id_cols=c(ResponseId,weight_trimmed),names_from=prospect_gain,values_from=choice)%>%
  mutate(across(`Program A`:`Program B`,~ifelse(is.na(.),0,.)))


# t.test(x=subset_die$prospect_loss[])
wtd.t.test(x=subset_save$`Program A`,
           y=subset_save$`Program B`,
           weight=subset_save$weight_trimmed,
           weighty=subset_save$weight_trimmed)

## results among sub-groups of respondents for Table S3. each row here is a column in Table S3
table(d1$prospect_loss[d1$demo_educ!="Tertiary education (college or university degree)"])/length(na.omit(d1$prospect_loss[d1$demo_educ!="Tertiary education (college or university degree)"])) 
table(d1$prospect_loss[d1$demo_educ=="Tertiary education (college or university degree)"])/length(na.omit(d1$prospect_loss[d1$demo_educ=="Tertiary education (college or university degree)"])) ## no difference by education
table(d1$prospect_loss[d1$demo_gender=="Female"])/length(na.omit(d1$prospect_loss[d1$demo_gender=="Female"])) ## no differences by gender 
table(d1$prospect_loss[d1$demo_gender!="Female"])/length(na.omit(d1$prospect_loss[d1$demo_gender!="Female"])) ## no differences by gender 
table(d1$prospect_loss[d1$demo_agegroup%in%c("18-29","30-49")==FALSE])/length(na.omit(d1$prospect_loss[d1$demo_agegroup%in%c("18-29","30-49")==FALSE]))## this is the difference
table(d1$prospect_loss[d1$demo_agegroup%in%c("18-29","30-49")==TRUE])/length(na.omit(d1$prospect_loss[d1$demo_agegroup%in%c("18-29","30-49")==TRUE]))## this is the difference

## average weight for the older individuals: 
mean(d1$weight_trimmed[d1$demo_agegroup%in%c("50-59","60+")])

# cost and reach analysis: Table S4 #### 
adsetsreport <- read.csv("indonesia data/indonesia_adsets/IndonesiaAdSetsReport.csv")
indonesia_summary<-adsetsreport%>%
  summarise(
    Impressions=sum(Impressions,na.rm=TRUE),
    Reach=sum(Reach,na.rm=TRUE),
    Clicks=sum(Link.clicks,na.rm=TRUE),
    Survey_results=nrow(d),
    Total_spent=sum(Amount.spent..USD.,na.rm=TRUE),
    Click_through_rate=Clicks/Impressions,
    Completion_rate=Survey_results/Impressions,
    Cost_per_click=Total_spent/Clicks,
    Cost_per_complete=Total_spent/Survey_results
  )

print(indonesia_summary)

## Table S5: costs of targeting #### 

full.sum<-adsetsreport%>%
  group_by(Ad.Set.Name)%>%
  summarise(reach=sum(Reach,na.rm=TRUE),
            impressions=sum(Impressions,na.rm=TRUE),
            spent=sum(Amount.spent..USD.,na.rm=TRUE),
            clicks=sum(Link.clicks,na.rm=TRUE))%>%
  mutate(clickthroughrate=clicks*100/impressions,
         costperclick=spent/clicks)

## problem with the pixel prevented Facebook from recording results for our entrants, 
## so we pull from the survey data to measure the number of people who took the survey. 

# celldata<-bind_rows(d%>%select(cell),adclickers%>%select(cell))
celldata<-d%>%select(cell)
celldata<-data.frame(table(celldata$cell))%>%
  rename(cell=Var1)
celldata%<>%
  mutate(cell=tolower(cell),
         cell=gsub("21-29","21",cell),
         cell=gsub("30-49","30",cell),
         cell=gsub("[+]","",cell),
         cell=gsub("60+","60",cell),
         cell=gsub("50-59","50",cell),
         cell=gsub("[.]","_",cell))

celldata%<>%rename(responses=Freq)
full.sum%<>%left_join(celldata,by=c("Ad.Set.Name"="cell"))
full.sum%<>%
  mutate(completerate=responses*100/impressions,
         costpercomplete=spent/responses)

## data for table S5 #### 
summary(full.sum$clickthroughrate)
summary(full.sum$completerate) 
summary(full.sum$costperclick)
summary(full.sum$costpercomplete) 



## cost by respondent type: table S8 #### 
w.spend<-sum(adsetsreport$Amount.spent..USD.[adsetsreport$Gender=="female"],na.rm=TRUE)
m.spend<-sum(adsetsreport$Amount.spent..USD.[adsetsreport$Gender=="male"],na.rm=TRUE)
## table S8 numbers: 
sum(adsetsreport$Amount.spent..USD.,na.rm=TRUE)/nrow(d) ## total cost per complete averaged across all individuals 
m.spend/nrow(d[d$demogender=="Male",]) ## cost per complete for men
w.spend/nrow(d[d$demogender=="Female",]) ## cost per complete for women


